
---
title: "Protest and Digital Adaptation"
date: "01/10/2021"
output:
  pdf_document: default
  html_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## R Markdown
This file replicates the statistical analysis of "Protest and Digital Adaptation". 

Load the dataset (as put together in the data generating file) and required packages. 
```{r}

rm(list = ls())
library(readr)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(stargazer)
library(interplot)
library(lubridate)
library(lmtest)
library(janitor)
df <- read.csv(file.path("df.csv.gz"), stringsAsFactors = F)


#assign variable type
df$date <- as.Date(df$date)
df$country <- as.character(df$country)

#overview
table(df$protest)
summary(df$max_numparticipants)
summary(df$max_secengagement)

###assign repressive state enforcement
df$max_secengagement[df$max_secengagement == 0] <- NA
df$max_secengagement[df$max_secengagement == 1] <- 0  #for non-physical state enforcement
df$max_secengagement[df$max_secengagement >= 2] <- 1  #for physical state enforcement

df$pro_max_secengagement[df$pro_max_secengagement == 0] <- NA
df$pro_max_secengagement[df$pro_max_secengagement == 1] <- 0  #for non-physical state enforcement
df$pro_max_secengagement[df$pro_max_secengagement >= 2] <- 1  #for physical state enforcement

###how many cases of size and repression are documented
table(df$protest)
table(df$protest_number, exclude=NULL)
summary(df$max_numparticipants, exclude = NULL)
summary(df$max_secengagement, exclude = NULL)

#generate weekdays 
df$wday <- wday(as.Date(df$date,'%d-%m-%Y'), label=TRUE)
df$wday <- as.factor(df$wday)
```

Generate descriptive summary statistics and create lagged variables within country group. 

```{r}
summary(df)

###create lagged variables sorted by country
df <- 
  df%>%
  group_by(country) %>%
  mutate(lag_users_total = dplyr::lag(users_total, n = 1, default = NA),
         lag_users_relay = dplyr::lag(users_relay, n = 1, default = NA),
         lag_users_bridge = dplyr::lag(users_bridge, n = 1, default = NA),
         lag_protest = dplyr::lag(protest, n = 1, default = NA),
         lag_protest_2 = dplyr::lag(protest, n = 2, default = NA),
         lag_protest_number = dplyr::lag(protest_number, n = 1, default = NA),
         lag_protest_number_2 = dplyr::lag(protest_number, n = 2, default = NA),
         lag_max_numparticipants = dplyr::lag(max_numparticipants, n = 1, default = NA),
         lag_max_secengagement = dplyr::lag(max_secengagement, n = 1, default = NA),
         lag_pro_protest = dplyr::lag(pro_protest, n = 1, default = NA),
         lag_pro_protest_number = dplyr::lag(pro_protest_number, n = 1, default = NA),
         lag_pro_max_numparticipants = dplyr::lag(pro_max_numparticipants, n = 1, default = NA),
         lag_pro_max_secengagement = dplyr::lag(pro_max_secengagement, n = 1, default = NA),
         lag_scad_protest = dplyr::lag(scad_protest, n = 1, default = NA),
         lag_scad_protestnumber = dplyr::lag(scad_protestnumber, n = 1, default = NA),
         lag_disaster = dplyr::lag(disaster, n = 1, default = NA),
         lag_disaster_number = dplyr::lag(disaster_number, n = 1, default = NA))


         
```

First, we present the results of how protest influences Tor usage.
We operationalize the dependent variable 'Tor usage' as (1) total Tor usage, (2) relay usage, and (3) bridge usage.The independent variables are operationalized in the following ways: (1) protest occurrence, (2) number of protest occurrences, (3) number of participants, and (4) whether the government responds with physical intervention. For the last three variables, we create dummy variables, where we recode the variable into two groups based on a median split.


```{r}

###Modifications prior to analysis: Generate dummy variables 
###Create two dummy variables for protest numbers differentiating between a median split.
summary(df$lag_protest_number)

df$low_number <- 0
df$low_number[df$lag_protest_number == 1] <- 1

df$high_number <- 0
df$high_number[df$lag_protest_number > 1] <- 1

df$low_number_0 <- 0
df$low_number_0[df$protest_number == 1] <- 1

df$high_number_0 <- 0
df$high_number_0[df$protest_number > 1] <- 1

df$low_number_2 <- 0
df$low_number_2[df$lag_protest_number_2 == 1] <- 1

df$high_number_2 <- 0
df$high_number_2[df$lag_protest_number_2 > 1] <- 1


###Create two dummy variables for number of participants differentiating in small and large protests.
###Use a median split to separate small and large protests. 
summary(df$lag_max_numparticipants)

df$low_max_numparticipants <- 0
df$low_max_numparticipants[df$lag_max_numparticipants < 250] <- 1

df$high_max_numparticipants <- 0
df$high_max_numparticipants[df$lag_max_numparticipants >= 250] <- 1


###Create two dummy variables for non-physical and physical intervention.
summary(df$lag_max_secengagement)

df$non_violent <- 0
df$non_violent[df$lag_max_secengagement ==0] <- 1

df$violent <- 0
df$violent[df$lag_max_secengagement ==1] <- 1

###Summary statistics

df[,c("filter")] <- max(df$filter, na.rm = T) - df[,c("filter")]
df[,c("social_media")] <- max(df$social_media, na.rm = T) - df[,c("social_media")]

df_summary <- df[c("users_relay", "users_bridge", "users_total", "protest", "low_number", "high_number", "low_max_numparticipants", "high_max_numparticipants", "non_violent", "violent", "filter", "social_media")]

df_summary <- as.data.frame(df_summary)

stargazer(df_summary,
          title="Summary Statistics",
          out="appendix-A1.tex",
          float = F,
          digits = 2,
          omit.summary.stat = c("p25", "p75"),
          covariate.labels = c("Relay usage", "Bridge usage", "Total Tor usage", "Protest", "One protest", "Multiple protests", "Few participants","Several participants","Non-physical interv.", "Physical interv.", "Internet filtering", "Social media censorship"))



```

In the following, we demonstrate the effect of protest occurrence and the number of protests (split into 'one' and 'multiple protests') on Tor usage with a linear regression model. We show this effect for three types of Tor usage (total Tor usage, relay usage, bridge usage). In addition, we include the effect of participants ('few' and 'several participants') and governmental intervention ('non-physical' and 'physical intervention') on Tor usage. This leaves us with two tables per dependent variable, containing four models each.
For the dependent variable, we take the log base 10 plus 1 (as we have values of zero in the dependent variable). For the explanatory variables, we use the lagged dependent variable to account for serial autocorrelation. To account for heterogeneity, we use week, country and year fixed effects and week, country*year fixed effects to control for country-specific temporal trends. 
```{r, results='asis', warning=FALSE}
#Durbin-Watson test for autocorrelation for Model 1 and Model 3
#without lagged DV
dwtest(lm(log10(1+users_total) ~ lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df))

dwtest(lm(log10(1+users_total) ~ low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df))


#with DV t-1
dwtest(lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df))

dwtest(lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df))


###ACF plot
model1 <- lm(log10(1+users_total) ~ lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df)

model2 <- lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df)

model3 <- lm(log10(1+users_total) ~ low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df)

model4 <- lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df)



pdf(file.path(file = "auto-1.pdf"), width=7,height=4,paper='special')
acf(model1$residuals, type = "correlation", main =  "")
dev.off()

pdf(file.path(file = "auto-2.pdf"), width=7,height=4,paper='special')
acf(model2$residuals, type = "correlation", main =  "")
dev.off()

pdf(file.path(file = "auto-3.pdf"), width=7,height=4,paper='special')
acf(model3$residuals, type = "correlation", main =  "")
dev.off()

pdf(file.path(file = "auto-4.pdf"), width=7,height=4,paper='special')
acf(model4$residuals, type = "correlation", main =  "")
dev.off()



```

```{r, results='asis', warning=FALSE}
##############################################
#Total Tor Usage 
##############################################
stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number + high_number + as.factor(country) *as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Protest and total Tor users",
          out="table-T1.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          single.row = T,
          no.space = F,
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Protest (t-1)",
                             "One protest (t-1)",
                             "Multiple protests (t-1)"))

        
          
stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_max_numparticipants + high_max_numparticipants + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_max_numparticipants + high_max_numparticipants + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + non_violent + violent + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + non_violent + violent + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Participation and repression on total Tor users",
          out="appendix-A2.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Few participants (t-1)",
                             "Several participants (t-1)",
                             "Non-physical interv. (t-1)",
                             "Physical interv. (t-1)"))




###################################
# Relay Usage
###################################

stargazer(lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + lag_protest + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + low_number + high_number + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Protest on relay usage",
          out="appendix-A3.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Relay usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Relay usage (log) (t-1)",
                             "Protest (t-1)",
                             "One protest (t-1)",
                             "Multiple protests (t-1)"))

stargazer(lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + low_max_numparticipants + high_max_numparticipants + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + low_max_numparticipants + high_max_numparticipants + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + non_violent + violent + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_relay) ~ log10(1+lag_users_relay) + non_violent + violent + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Participation and repression on relay usage",
          out="appendix-A4.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Relay usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Relay usage (log) (t-1)",
                             "Few participants (t-1)",
                             "Several participants (t-1)",
                             "Non-physical interv. (t-1)",
                             "Physical interv. (t-1)"))

###################################
#Bridge Usage
###################################

stargazer(lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + lag_protest + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + low_number + high_number + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Protest on bridge users",
          out="appendix-A5.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Bridge usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Bridge usage (log) (t-1)",
                             "Protest (t-1)",
                             "One protest (t-1)",
                             "Multiple protests (t-1)"))

stargazer(lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + low_max_numparticipants + high_max_numparticipants + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + low_max_numparticipants + high_max_numparticipants + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + non_violent + violent + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_bridge) ~ log10(1+lag_users_bridge) + non_violent + violent + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Participation and repression on bridge users",
          out="appendix-A6.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Bridge usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Bridge usage (log) (t-1)",
                             "Few participants (t-1)",
                             "Several participants (t-1)",
                             "Non-physical interv. (t-1)",
                             "Physical interv. (t-1)"))

###################################
# Independent variable at t0 and t2
###################################
stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + protest + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number_0 + high_number_0 + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number_0 + high_number_0 + as.factor(country) *as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Protest and total Tor users at t0",
          out="appendix-A7.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Protest (t0)",
                             "One protest (t0)",
                             "Multiple protests (t0)"))


stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest_2 + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest_2 + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number_2 + high_number_2 + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number_2 + high_number_2 + as.factor(country) *as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Protest and total Tor users at t2",
          out="appendix-A8.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Protest (t-2)",
                             "One protest (t-2)",
                             "Multiple protests (t-2)"))


```
Second, we include robustness checks to support the results of our analysis from the first Table. More specifically, we repeat the analysis of the first four models. 

First, we exclude Tor artefacts from our analysis, thus we exclude missing values from Tor Metrics and observations where we expect a network overload in the Tor network. This network overload was especially prominent in Russia and UAE. This is why we exclude the case of Russia and UAE from the sample. 

```{r, results='asis', warning=FALSE}
###Exclude missing days from the analysis, as well as excluding Russia and UAE which experience a high number of Tor users around the network overload. 
df.robust <- df %>% filter(!date=="2011-09-03" & !date=="2012-01-08" & !date=="2016-09-18" & !country=="365" & !country=="696")

##############################################
#Total Tor Usage 
##############################################

stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df.robust),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest + as.factor(country)*as.factor(year) + as.factor(wday), data=df.robust),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number + high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df.robust),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + low_number + high_number + as.factor(country) *as.factor(year) + as.factor(wday), data=df.robust),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Tor artefacts - protest and total Tor users",
          out="appendix-A9.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Protest (t-1)",
                             "One protest (t-1)",
                             "Multiple protests (t-1)"))


          
          

```


As a second robustness check, we recreate the initial analysis with an alternative protest variable from the Social Conflict Analytical Database (SCAD). We expect that our results hold even with another source of protest variables. 

```{r}


###split number of SCAD protest into one and multiple protests 
table(df$lag_scad_protest)

df$scad_low_number <- 0
df$scad_low_number[df$lag_scad_protestnumber == 1] <- 1

df$scad_high_number <- 0
df$scad_high_number[df$lag_scad_protestnumber > 1] <- 1




```

```{r, results='asis', warning=FALSE}
##############################################
#Total Tor Usage - SCAD Data
##############################################
###sub-sample of only African and Latin American countries
df_scad <- df %>%
  filter(!(country==700 | country==371 | country==373 | country==771 | country==370 | country==811 |
             country==710 | country==372 | country==630 | country==663 | country==705 | country==690 |
             country==703 | country==812 | country==812 | country==820 | country==775 | country==790 |
             country==731 | country==698 | country==770 | country==365 | country==670 | country==830 |
             country==780 | country==652 | country==702 | country==800 | country==701 | country==696 |
             country==704 | country==816 | country==780))
  
           
stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_scad_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df_scad),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_scad_protest + as.factor(country)*as.factor(year) + as.factor(wday), data=df_scad),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + scad_low_number + scad_high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df_scad),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + scad_low_number + scad_high_number + as.factor(country) *as.factor(year) + as.factor(wday), data=df_scad),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="SCAD and total Tor users",
          out="appendix-A10.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Protest (t-1)",
                             "One protest (t-1)",
                             "Multiple protests (t-1)"))
```
In addition, we control for reverse causality as a third robustness check. It might be that our findings are a result of the effect of Tor usage on protests (and not the other way around, as we expect in our study). Thus, we include the effect of Tor usage (total Tor usage, relay usage, bridge usage) on protest (protest occurrence, one protest, multiple protests) including weekday with country*year fixed effects. 

```{r, results='asis', warning=FALSE}
##############################################
#Reverse causality: Tor usage on protest 
##############################################

###Few protest = protest_number, low_number = lag_protest_number                             
df$few_protest <- 0
df$few_protest[df$protest_number == 1] <- 1

df$several_protest <- 0
df$several_protest[df$protest_number > 1] <- 1    

###Binomial Logistic Regression for Dichotomous Protest Variable (0/1)

stargazer(glm(protest ~ lag_protest + log10(1+lag_users_total) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(protest ~ lag_protest + log10(1+lag_users_relay) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(protest ~ lag_protest + log10(1+lag_users_bridge) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(few_protest ~ low_number + log10(1+lag_users_total) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(few_protest ~ low_number + log10(1+lag_users_relay) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(few_protest ~ low_number + log10(1+lag_users_bridge) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(several_protest ~ high_number + log10(1+lag_users_total) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(several_protest ~ low_number + log10(1+lag_users_relay) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          glm(several_protest ~ high_number + log10(1+lag_users_bridge) + as.factor(country)*as.factor(year) +  as.factor(wday), family = "binomial", data = df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Reverse causality",
          out="appendix-A11.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = c("Protest","One protest", "Multiple protests"),
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Country $\\times$ Year FEs?", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")),
          covariate.labels=c("Protest (t-1)",
                             "One protest (t-1)",
                             "Multiple protests (t-1)",
                             "Total Tor usage (log) (t-1)",
                             "Relay usgae (log) (t-1)",
                             "Bridge usage (log) (t-1)"))

                        
```


Third, we apply three Placebo tests. In the first one, we distribute the occurrence of multiple protests at random order among countries and years. For that, we create an alternative protest variable `placebo' which shares the same distribution of multiple protests but at random order. If then the results show no effect on Tor usage, we can be confident that a random order of protest occurrence is not linked to Tor usage. By this, we see that the order of protest occurrence is linked to an increase in Tor usage. (Due to the time consuming process, we provide the simulated coefficient estimates below. The simulation can be found in a separate file.)


Placebo test (short version: includes estimates of simulation)
```{r, out.width='100%', fig.align='center', fig.cap='...'}

load(file = "result_high.rda")
load(file = "result_high2.rda")

pdf(file.path(file = "tor-placebo-model1.pdf"), width=7,height=4,paper='special')
plot(density(result_high), main = "") + abline(v=0.013, col="red", lty=2)
dev.off()

pdf(file.path(file = "tor-placebo-model2.pdf"), width=7,height=4,paper='special')
plot(density(result_high2), main = "") + abline(v=0.011, col="red", lty=2)
dev.off()

quantile(result_high, c(.90, .95, .99)) # estimate at 0.013
quantile(result_high2, c(.90, .95, .99)) # estimate at 0.011
```

For the second placebo test, we only use pro-government protest events. We expect to see that pro-government protests should not have an effect on Tor usage. Those, who protest in favor of the government, are not in fear of prosecution and have thus not an inventive to turn to anonymity-preserving browsers.

```{r}

###Modifications prior to analysis: Generate dummy variables for pro-government protests
###Create two dummy variables for protest numbers differentiating between a median split.
summary(df$lag_pro_protest_number)

df$pro_low_number <- 0
df$pro_low_number[df$lag_pro_protest_number == 1] <- 1

df$pro_high_number <- 0
df$pro_high_number[df$lag_pro_protest_number > 1] <- 1



###Crosstab: pro-regime protests on days with anti-regime protests
table(df$protest)
table(df$protest, df$pro_protest)

pro_gov <- df %>% dplyr::select(protest, pro_protest)
pro_gov$protest <- ifelse(pro_gov$protest==1, "anti-gov protest (1)", "anti-gov protest (0)")
pro_gov$pro_protest <- ifelse(pro_gov$pro_protest==1, "pro-gov protest (1)", "pro-gov protest (0)")  

pro_gov <- pro_gov %>% tabyl(protest, pro_protest)

stargazer(as.data.frame(pro_gov), summary=FALSE, rownames = F,
          out="crosstab.tex", float = F)
```

Analysis of Pro-government protests on Tor usage. 
```{r, results='asis', warning=FALSE}
##############################################
#Total Tor Usage - Pro Government Protests
##############################################

stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_pro_protest + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_pro_protest + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + pro_low_number + pro_high_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + pro_low_number + pro_high_number + as.factor(country) *as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Pro-government protests and total Tor users",
          out="appendix-A12.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Protest (t-1)",
                             "One protest (t-1)",
                             "Multiple protests (t-1)"))
          
    
```

For the third Placebo test, we include natural disasters from the Emergency Event Database (EM-DAT). These events should be unrelated to patterns of Tor usage in the respective country. 

```{r, results='asis', warning=FALSE}
##############################################
#Total Tor Usage - EM-DAT
##############################################

stargazer(lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_disaster + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_disaster + as.factor(country)*as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_disaster_number + as.factor(country) + as.factor(year) + as.factor(wday), data=df),
          lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_disaster_number + as.factor(country) *as.factor(year) + as.factor(wday), data=df),
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Natural disasters and total Tor users",
          out="appendix-A13.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes", "Yes", "Yes"),
                           c("Country FEs?", "Yes", "No", "Yes", "No"),
                           c("Year FEs?", "Yes", "No", "Yes", "No"),
                           c("Country $\\times$ Year FEs?", "No", "Yes", "No", "Yes")),
          covariate.labels=c("Total tor usage (log) (t-1)",
                             "Disaster (t-1)",
                             "Severity (t-1)"))



```




Fourth, we demonstrate how digital media freedom in autocracies influences Tor usage. 
We include country-year data from the Digital Society Survey (VDem) on Government Internet filtering in practice, and Government social media shut down in practice. The variables are cross-order aggregated by using the Bayesian item response theory measurement model.

```{r, results='asis', warning=FALSE}


m1 <- lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest*filter + as.factor(wday), data=df)
m2 <- lm(log10(1+users_total) ~ log10(1+lag_users_total) + lag_protest*social_media + as.factor(wday), data=df)

stargazer(m1, m2,
          type = "html",
          star.char = c("*", "**"),
          star.cutoffs = c(0.05, 0.01),
          notes = c("*p$<$0.05; **p$<$0.01"),
          notes.append = F,
          title="Protest and censorship on total Tor users",
          out="table-T2.tex",
          float = F,
          omit="as.factor",
          omit.stat = c("ser", "f"),
          single.row = T,
          no.space = F,
          dep.var.labels = "Total Tor usage (log)",
          add.lines = list(c("Weekday FEs?", "Yes", "Yes")),
          covariate.labels=c("Total Tor usage (log) (t-1)",
                             "Protest (t-1)",
                             "Internet filtering",
                             "Social media censorship",
                             "Protest $\\times$ Internet filtering (t-1)",
                             "Protest $\\times$ Social media censorship (t-1)"))



```

Conditional Marginal Effects Plot
```{r, out.width='100%', fig.align='center', fig.cap='...'}

pdf(file.path(file = "marginal-effect-1.pdf"), width=7,height=4,paper='special')
interplot(m = m1, var1 = "lag_protest", var2 = "filter", hist = TRUE) +
  theme(legend.position="none") +   
  geom_hline(yintercept = 0, linetype = "dashed") + theme_minimal() +
  ylab("Estimated coefficient for Tor usage") + xlab("Internet filtering") 
dev.off()

pdf(file.path(file = "marginal-effect-2.pdf"), width=7,height=4,paper='special')
interplot(m = m2, var1 = "lag_protest", var2 = "social_media", hist = TRUE) +
  theme(legend.position="none") +   
  geom_hline(yintercept = 0, linetype = "dashed") + theme_minimal() +
  ylab("Estimated coefficient for Tor usage") + xlab("Social media censorship") 
dev.off()

```

Visualizations
```{r, out.width='100%', fig.align='center', fig.cap='...'}
df.all <- df %>% 
  group_by(date) %>%
  summarise(users_total=sum(users_total), users_relay=sum(users_relay), users_bridge=sum(users_bridge))


summary(df.all$users_relay)
summary(df.all$users_bridge)



###Tor timeline (Total Tor users)
pdf(file.path(file = "tor-timeline-total.pdf"), width=7,height=4,paper='special')
ggplot(df.all, aes(x=date, y=users_total)) +
  geom_line(color="grey") + 
  geom_smooth(aes(x=date, y=users_total), color="black", linetype = "solid", size=0.5) +
  xlab('Year') +
  ylab('Number of Tor users') +
  scale_y_continuous(labels = scales::comma)+
  theme_minimal()
dev.off()




###Tor timeline (relay and bridge users, separated)
pdf(file.path(file = "tor-timeline-relay.pdf"), width=7,height=4,paper='special')
ggplot(df.all, aes(x=date, y=users_relay)) +
  geom_line(color="grey") + 
  geom_smooth(aes(x=date, y=users_relay), color="black", linetype = "solid", size=0.5) +
  xlab('Year') +
  ylab('Number of relay users') +
  scale_y_continuous(labels = scales::comma)+
  theme_minimal()
dev.off()

pdf(file.path(file = "tor-timeline-bridge.pdf"), width=7,height=4,paper='special')
ggplot(df.all, aes(x=date, y=users_bridge)) +
  geom_line(color="grey") + 
  geom_smooth(aes(x=date, y=users_bridge), color="black", linetype = "solid", size=0.5) +
  xlab('Year') +
  ylab('Number of bridge users') +
  scale_y_continuous(labels = scales::comma)+
  theme_minimal() 
dev.off()




```





